<!doctype html>



  


<html class="theme-next mist use-motion" lang="zh-Hans">
<head>
  <meta charset="UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1"/>



<meta http-equiv="Cache-Control" content="no-transform" />
<meta http-equiv="Cache-Control" content="no-siteapp" />












  
  
  <link href="/lib/fancybox/source/jquery.fancybox.css?v=2.1.5" rel="stylesheet" type="text/css" />




  
  
  
  

  
    
    
  

  

  

  

  

  
    
    
    <link href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic&subset=latin,latin-ext" rel="stylesheet" type="text/css">
  






<link href="/lib/font-awesome/css/font-awesome.min.css?v=4.6.2" rel="stylesheet" type="text/css" />

<link href="/css/main.css?v=5.1.0" rel="stylesheet" type="text/css" />


  <meta name="keywords" content="cassandra,CQL,SQL," />








  <link rel="shortcut icon" type="image/x-icon" href="/favicon.ico?v=5.1.0" />






<meta name="description" content="从关系型数据库RDBMS迁移到Cassandra的一大难点是数据建模。这对已经熟悉了关系模型设计思想和SQL处理风格的开发人员来说尤其痛苦。但是，新的技术应对新的需求，在某些特定场景具有巨大的优势，因此熟悉并掌握这种新的思想对决策者来说仍然是很必要的。">
<meta property="og:type" content="article">
<meta property="og:title" content="深入理解Cassandra中的数据建模">
<meta property="og:url" content="http://chrisrc.me/2017/02/08/cassandra-data-modeling/index.html">
<meta property="og:site_name" content="山中，山外与俯瞰">
<meta property="og:description" content="从关系型数据库RDBMS迁移到Cassandra的一大难点是数据建模。这对已经熟悉了关系模型设计思想和SQL处理风格的开发人员来说尤其痛苦。但是，新的技术应对新的需求，在某些特定场景具有巨大的优势，因此熟悉并掌握这种新的思想对决策者来说仍然是很必要的。">
<meta property="og:image" content="http://chrisrc.me/images/cassandra-concept-row.png">
<meta property="og:image" content="http://chrisrc.me/images/cassandra-concept-key-composite.png">
<meta property="og:image" content="http://chrisrc.me/images/cassandra-concept-column-composite.png">
<meta property="og:image" content="http://chrisrc.me/images/cassandra-concept-column-family.png">
<meta property="og:image" content="http://chrisrc.me/images/cassandra-concept-table-single-row.png">
<meta property="og:image" content="http://chrisrc.me/images/cassandra-concept-table-multi-row.png">
<meta property="og:image" content="http://chrisrc.me/images/cassandra-cql-query-basic.png">
<meta property="og:image" content="http://chrisrc.me/images/cassandra-cql-query-basic-storage.png">
<meta property="og:image" content="http://chrisrc.me/images/cassandra-cql-query-range.png">
<meta property="og:image" content="http://chrisrc.me/images/cassandra-cql-denormalization-set.png">
<meta property="og:image" content="http://chrisrc.me/images/cassandra-materialized-view-performance.png">
<meta property="og:image" content="http://chrisrc.me/images/cassandra-materialized-view-process.png">
<meta property="og:image" content="http://chrisrc.me/images/cassandra-materialized-view-perf2.png">
<meta property="og:updated_time" content="2017-02-08T08:26:12.000Z">
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="深入理解Cassandra中的数据建模">
<meta name="twitter:description" content="从关系型数据库RDBMS迁移到Cassandra的一大难点是数据建模。这对已经熟悉了关系模型设计思想和SQL处理风格的开发人员来说尤其痛苦。但是，新的技术应对新的需求，在某些特定场景具有巨大的优势，因此熟悉并掌握这种新的思想对决策者来说仍然是很必要的。">
<meta name="twitter:image" content="http://chrisrc.me/images/cassandra-concept-row.png">



<script type="text/javascript" id="hexo.configurations">
  var NexT = window.NexT || {};
  var CONFIG = {
    root: '/',
    scheme: 'Mist',
    sidebar: {"position":"left","display":"post"},
    fancybox: true,
    motion: true,
    duoshuo: {
      userId: '0',
      author: '博主'
    },
    algolia: {
      applicationID: '',
      apiKey: '',
      indexName: '',
      hits: {"per_page":10},
      labels: {"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}
    }
  };
</script>



  <link rel="canonical" href="http://chrisrc.me/2017/02/08/cassandra-data-modeling/"/>





  <title> 深入理解Cassandra中的数据建模 | 山中，山外与俯瞰 </title>
</head>

<body itemscope itemtype="http://schema.org/WebPage" lang="zh-Hans">

  










  
  
    
  

  <div class="container one-collumn sidebar-position-left page-post-detail ">
    <div class="headband"></div>

    <header id="header" class="header" itemscope itemtype="http://schema.org/WPHeader">
      <div class="header-inner"><div class="site-meta ">
  

  <div class="custom-logo-site-title">
    <a href="/"  class="brand" rel="start">
      <span class="logo-line-before"><i></i></span>
      <span class="site-title">山中，山外与俯瞰</span>
      <span class="logo-line-after"><i></i></span>
    </a>
  </div>
  <p class="site-subtitle">一次永不停止的探索之旅</p>
</div>

<div class="site-nav-toggle">
  <button>
    <span class="btn-bar"></span>
    <span class="btn-bar"></span>
    <span class="btn-bar"></span>
  </button>
</div>

<nav class="site-nav">
  

  
    <ul id="menu" class="menu">
      
        
        <li class="menu-item menu-item-home">
          <a href="/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-home"></i> <br />
            
            首页
          </a>
        </li>
      
        
        <li class="menu-item menu-item-dcos">
          <a href="/dcos" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-book"></i> <br />
            
            DC/OS
          </a>
        </li>
      
        
        <li class="menu-item menu-item-categories">
          <a href="/categories" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-th"></i> <br />
            
            分类
          </a>
        </li>
      
        
        <li class="menu-item menu-item-about">
          <a href="/about" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-user"></i> <br />
            
            关于
          </a>
        </li>
      
        
        <li class="menu-item menu-item-archives">
          <a href="/archives" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-archive"></i> <br />
            
            归档
          </a>
        </li>
      
        
        <li class="menu-item menu-item-tags">
          <a href="/tags" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-tags"></i> <br />
            
            标签
          </a>
        </li>
      

      
        <li class="menu-item menu-item-search">
          
            <a href="javascript:;" class="st-search-show-outputs">
          
            
              <i class="menu-item-icon fa fa-search fa-fw"></i> <br />
            
            搜索
          </a>
        </li>
      
    </ul>
  

  
    <div class="site-search">
      
  <form class="site-search-form">
  <input type="text" id="st-search-input" class="st-search-input st-default-search-input" />
</form>

<script type="text/javascript">
  (function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){
    (w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t);
    e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e);
  })(window,document,'script','//s.swiftypecdn.com/install/v2/st.js','_st');

  _st('install', 'R9GcmhBwxpsAGLNCsquZ','2.0.0');
</script>



    </div>
  
</nav>



 </div>
    </header>

    <main id="main" class="main">
      <div class="main-inner">
        <div class="content-wrap">
          <div id="content" class="content">
            

  <div id="posts" class="posts-expand">
    

  

  
  
  

  <article class="post post-type-normal " itemscope itemtype="http://schema.org/Article">
  <link itemprop="mainEntityOfPage" href="http://chrisrc.me/2017/02/08/cassandra-data-modeling/">

  <span style="display:none" itemprop="author" itemscope itemtype="http://schema.org/Person">
    <meta itemprop="name" content="chrisrc">
    <meta itemprop="description" content="">
    <meta itemprop="image" content="/images/avatar.gif">
  </span>

  <span style="display:none" itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
    <meta itemprop="name" content="山中，山外与俯瞰">
    <span style="display:none" itemprop="logo" itemscope itemtype="http://schema.org/ImageObject">
      <img style="display:none;" itemprop="url image" alt="山中，山外与俯瞰" src="">
    </span>
  </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">
            
            
              
                深入理解Cassandra中的数据建模
              
            
          </h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              <span class="post-meta-item-text">发表于</span>
              <time title="Post created" itemprop="dateCreated datePublished" datetime="2017-02-08T18:28:18+08:00">
                2017-02-08
              </time>
            

            

            
          </span>

          
            <span class="post-category" >
              <span class="post-meta-divider">|</span>
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              <span class="post-meta-item-text">分类于</span>
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/categories/Cassandra/" itemprop="url" rel="index">
                    <span itemprop="name">Cassandra</span>
                  </a>
                </span>

                
                
              
            </span>
          

          
            
              <span class="post-comments-count">
                <span class="post-meta-divider">|</span>
                <a href="/2017/02/08/cassandra-data-modeling/#comments" itemprop="discussionUrl">
                  <span class="post-comments-count ds-thread-count" data-thread-key="2017/02/08/cassandra-data-modeling/" itemprop="commentCount"></span>
                </a>
              </span>
            
          

          

          
          

          

          

        </div>
      </header>
    


    <div class="post-body" itemprop="articleBody">

      
      

      
        <p>从关系型数据库RDBMS迁移到Cassandra的一大难点是数据建模。这对已经熟悉了关系模型设计思想和SQL处理风格的开发人员来说尤其痛苦。但是，新的技术应对新的需求，在某些特定场景具有巨大的优势，因此熟悉并掌握这种新的思想对决策者来说仍然是很必要的。</p>
<a id="more"></a>
<p>本节主要整理自《Cassandra 3.x High Availablity》一书的第七章，并汇总一些互联网资料。</p>
<p>大多数传统的关系型数据库使用表格方法存储数据，它支持的各种随机访问查询。但是随机磁盘I/O往往是一个显著的瓶颈，因此，为了确保分布式写性能，Cassandra采用<strong>日志结构的存储引擎</strong>，这可以让它将数据顺序写入<strong>提交日志</strong>和Cassandra的持久存储结构<strong>SSTables</strong>。</p>
<h3 id="数据模型"><a href="#数据模型" class="headerlink" title="数据模型"></a>数据模型</h3><p>Cassandra的数据模型:</p>
<ul>
<li>列族作为存储和组织数据的方式</li>
<li>表作为一个多维列族的二维视图</li>
<li>使用Cassandra查询语言（CQL）对表进行操作</li>
</ul>
<h4 id="Row-Partition"><a href="#Row-Partition" class="headerlink" title="Row(Partition)"></a>Row(Partition)</h4><p><img src="/images/cassandra-concept-row.png" alt=""></p>
<h4 id="Key-Partition-Key"><a href="#Key-Partition-Key" class="headerlink" title="Key (Partition Key)"></a>Key (Partition Key)</h4><ul>
<li>Composite row key</li>
</ul>
<p><img src="/images/cassandra-concept-key-composite.png" alt=""></p>
<ul>
<li>Composite column key</li>
</ul>
<p><img src="/images/cassandra-concept-column-composite.png" alt=""></p>
<h4 id="Column-family-Table"><a href="#Column-family-Table" class="headerlink" title="Column family (Table)"></a>Column family (Table)</h4><p><img src="/images/cassandra-concept-column-family.png" alt=""></p>
<h4 id="Table-with-single-row-partitions"><a href="#Table-with-single-row-partitions" class="headerlink" title="Table with single-row partitions"></a>Table with single-row partitions</h4><p><img src="/images/cassandra-concept-table-single-row.png" alt=""></p>
<h4 id="Table-with-multi-row-partitions"><a href="#Table-with-multi-row-partitions" class="headerlink" title="Table with multi-row partitions"></a>Table with multi-row partitions</h4><p><img src="/images/cassandra-concept-table-multi-row.png" alt=""></p>
<p><strong>注意，请结合下述内容对上述概念进行理解。</strong></p>
<h3 id="日志结构存储"><a href="#日志结构存储" class="headerlink" title="日志结构存储"></a>日志结构存储</h3><p>Cassandra接收到一次写操作请求时，它会将数据同时写入提交日志和一个称为<strong>memtable</strong>的内存表。提交日志可以确保Cassandra的可靠性，Memtables会周期性的写入磁盘以不可变的SSTables形式保存。</p>
<p>保存在SSTables中的数据拆分为分区（这些分区对应着Primary Key）并按列名称排序。这一点非常重要，本节后续部分会详细探讨。提交日志仅在节点重新启动时用于恢复未及时写入SSTables的数据。</p>
<p>这种存储方案的性能有几个方面与数据建模有重要的关系：</p>
<ul>
<li><p><strong>写不可变性</strong></p>
<p>写总是附加操作，更新数据只需要写入新的值并附加一个新的时间戳（每一列都有带有一个时间戳）。</p>
<figure class="highlight json"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div><div class="line">8</div><div class="line">9</div><div class="line">10</div><div class="line">11</div><div class="line">12</div><div class="line">13</div><div class="line">14</div><div class="line">15</div><div class="line">16</div></pre></td><td class="code"><pre><div class="line">[</div><div class="line">    &#123;<span class="attr">"key"</span>: <span class="string">"Jack Jones"</span>,</div><div class="line">    <span class="attr">"cells"</span>: [[<span class="string">"1:"</span>,<span class="string">""</span>,<span class="number">1470229749953090</span>],</div><div class="line">        [<span class="string">"1:project_name"</span>,<span class="string">"Cassandra Tuning"</span>,<span class="number">1470229749953090</span>],</div><div class="line">        [<span class="string">"1:turnover"</span>,<span class="string">"5000000"</span>,<span class="number">1470229749953090</span>],</div><div class="line">        [<span class="string">"2:"</span>,<span class="string">""</span>,<span class="number">1470229928612372</span>],</div><div class="line">        [<span class="string">"2:project_name"</span>,<span class="string">"Spark Layer"</span>,<span class="number">1470229928612372</span>],</div><div class="line">        [<span class="string">"2:turnover"</span>,<span class="string">"2000000"</span>,<span class="number">1470229928612372</span>]]&#125;,</div><div class="line">    &#123;<span class="attr">"key"</span>: <span class="string">"Jill Hill"</span>,</div><div class="line">    <span class="attr">"cells"</span>: [[<span class="string">"1:"</span>,<span class="string">""</span>,<span class="number">1470229908473768</span>],</div><div class="line">        [<span class="string">"1:project_name"</span>,<span class="string">"Kubernetes Setup"</span>,<span class="number">1470229908473768</span>],</div><div class="line">        [<span class="string">"1:turnover"</span>,<span class="string">"2000000"</span>,<span class="number">1470229908473768</span>],</div><div class="line">        [<span class="string">"2:"</span>,<span class="string">""</span>,<span class="number">1470229948844042</span>],</div><div class="line">        [<span class="string">"2:project_name"</span>,<span class="string">"Front End"</span>,<span class="number">1470229948844042</span>],</div><div class="line">        [<span class="string">"2:turnover"</span>,<span class="string">"1000000"</span>,<span class="number">1470229948844042</span>]]&#125;</div><div class="line">]</div></pre></td></tr></table></figure>
</li>
<li><p><strong>以最后一次写为准</strong></p>
<p>如果磁盘上同一列存在多个版本，查询该列时，最新的数据被返回。</p>
</li>
<li><p><strong>列无法被物理删除</strong></p>
<p>不可变性也意味着DELETE操作被执行时，数据并没有被真正被删除。而是该列的值被一个<code>null</code>值所覆盖。</p>
</li>
<li><p><strong>顺序查询效率最高</strong></p>
<p>如果查询是顺序读取磁盘上的数据，可以借助底层存储结构的优势从而获得最大化的读性能。通常情况下，Cassandra尽量限制用户使用顺序查询，当然也有例外。</p>
</li>
</ul>
<h3 id="理解压缩（Compaction）"><a href="#理解压缩（Compaction）" class="headerlink" title="理解压缩（Compaction）"></a>理解压缩（Compaction）</h3><p>Cassandra通过一种称之为压缩（Compaction）的机制来处理随着时间推移而不断膨胀的SSTables。压缩将分散在多个文件中的分区（partitions）聚合成一个文件，并删除旧的数据，丢弃tombstones。释放空间仅是其中的一个目的，另一个重要的原因是通过将数据转移到一个SSTables中，可以降低跨文件或节点读取Key的磁盘I/O从而显著提高读的性能。</p>
<p>Cassandra提供了多种压缩策略，自3.8（或3.0.8）开始新增了<strong>Time-window</strong>压缩策略用于取代<strong>Date-tiered</strong>压缩策略。</p>
<ul>
<li><p>Size-tiered策略</p>
</li>
<li><p>Leveled策略</p>
</li>
<li><p>Time-window策略</p>
</li>
</ul>
<h3 id="CQL"><a href="#CQL" class="headerlink" title="CQL"></a>CQL</h3><p>CQL已经取代Thrift成为与Cassandra交互的标准接口。在了解CQL之前必须要意识到CQL的数据形式并不总是与底层的数据存储结构相匹配，而且，CQL也不是SQL，你必须理解CQL表现形式的真正含义才能避免设计出与Cassandra理念背道而驰的数据模型。</p>
<p>下面详解CQL语句与底层存储的转换。</p>
<h4 id="单个Primary-Key"><a href="#单个Primary-Key" class="headerlink" title="单个Primary Key"></a>单个Primary Key</h4><p>下面是一个名为<strong>books</strong>的表，仅有一个<strong>title</strong>主键：</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div></pre></td><td class="code"><pre><div class="line">CREATE TABLE books (</div><div class="line">    title text,</div><div class="line">    author text,</div><div class="line">    year int,</div><div class="line">PRIMARY KEY (title)</div><div class="line">);</div></pre></td></tr></table></figure>
<p>通过下述语句插入两条数据：</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div></pre></td><td class="code"><pre><div class="line">INSERT INTO books (title, author, year) VALUES (&apos;Patriot Games&apos;, &apos;Tom Clancy&apos;, 1987);</div><div class="line">INSERT INTO books (title, author, year) VALUES (&apos;Without Remorse&apos;, &apos;Tom Clancy&apos;, 1993);</div></pre></td></tr></table></figure>
<p>查询时会得到下述结果：</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div></pre></td><td class="code"><pre><div class="line">“SELECT * FROM books;</div><div class="line"></div><div class="line">title | author | year</div><div class="line">-----------------+------------+------</div><div class="line">Without Remorse | Tom Clancy | 1993</div><div class="line">Patriot Games | Tom Clancy | 1987”</div></pre></td></tr></table></figure>
<p>这看上去与传统的ANSI SQL非常相似，但是对应着Cassandra的底层存储却是完全不同。</p>
<p>在存储层，数据用一个Row Key即title和一个name/value组成的列集合表示。每个列都有一个时间戳用于处理冲突。</p>
<p>系统存储时根据Row Key的哈希值在Cassandra集群节点中分布式存储，因此查询返回的结果是无序的。相对比之下，列集合中的数据是根据列名称按自然语言顺序排列的。因此上例中<strong>author</strong>排在<strong>year</strong>的前面。<strong>这一点对于构建高效的数据模型至关重要</strong>。</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div></pre></td><td class="code"><pre><div class="line">Row Key: Without Remorse</div><div class="line">    =&gt; (name=author, value=Tom Clancy, timestamp=1393102991499000)</div><div class="line">    =&gt; (name=year, value=1993, timestamp=1393102991499000)</div><div class="line">Row Key: Patriot Games</div><div class="line">    =&gt; (name=author, value=Tom Clancy, timestamp=1393102991499100)</div><div class="line">    =&gt; (name=year, value=1987, timestamp=1393102991499100)</div></pre></td></tr></table></figure>
<p><strong>注意</strong>，这是旧的pre-3.0 CLI输出，仅用于理解概念，下述同。</p>
<h4 id="Compound-Keys"><a href="#Compound-Keys" class="headerlink" title="Compound Keys"></a>Compound Keys</h4><p>下面看一个authors表，这个表使用<strong>name</strong>，<strong>year</strong>和<strong>title</strong>作为组合主键。</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div><div class="line">8</div></pre></td><td class="code"><pre><div class="line">CREATE TABLE authors (</div><div class="line">    name text,</div><div class="line">    year int,</div><div class="line">    title text,</div><div class="line">    isbn text,</div><div class="line">    publisher text,</div><div class="line">PRIMARY KEY (name, year, title)</div><div class="line">);</div></pre></td></tr></table></figure>
<p>插入数据后通过CQL查询显示如下：</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div></pre></td><td class="code"><pre><div class="line">name | year | title | isbn | publisher</div><div class="line">------------+------+-----------------+---------------+-----------</div><div class="line">Tom Clancy | 1987 | Patriot Games | 0-399-13241-4 | Putnam</div><div class="line">Tom Clancy | 1993 | Without Remorse | 0-399-13825-0 | Putnam</div></pre></td></tr></table></figure>
<p>在深入理解底层的存储转换之前，先来理解两个概念：<strong>partion keys</strong> 和 <strong>clustering columns</strong>。</p>
<ul>
<li><strong>Partion keys</strong></li>
</ul>
<p>在声明主键时，主键列表中的第一个字段必定是分区Key。这个分区Key在存储层直接作为Row Key，通过哈希算法随机地存储在集群节点上。大多数查询需要提供分区Key，以便于Cassandra能够知道请求的数据存储在哪个节点上。</p>
<ul>
<li><strong>Clustering columns</strong></li>
</ul>
<p>主键列表中除第一个字段外，剩余的字段称之为Clustering columns，它们决定了数据在磁盘上的存储顺序。这些字段不属于分区Key，因此对于定位数据存储的节点没有任何帮助。但是，这些字段决定着用户能够如何对数据进行查询，下面会详细介绍。</p>
<p>因此，主键列表可以用如下方式表示：</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div></pre></td><td class="code"><pre><div class="line">PRIMARY KEY (partition_key, clustering1, clustering2)</div></pre></td></tr></table></figure>
<p>区分了上面的差异，下面来看authors表对应的底层存储：</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div></pre></td><td class="code"><pre><div class="line">Row Key: Tom Clancy</div><div class="line">    = &gt; (name = 1987: Patriot Games:ISBN, value = 0-399-13241-4)</div><div class="line">    = &gt; (name = 1987: Patriot Games:publisher, value = Putnam)</div><div class="line">    = &gt; (name = 1993: Without Remorse:ISBN, value = 0-399-13825-0)</div><div class="line">    = &gt; (name = 1993: Without Remorse:publisher, value = Putnam)</div></pre></td></tr></table></figure>
<p>从上面的存储结构可以看到，CQL中的两条记录在存储时成为一条记录（列族），因为这两条记录拥有相同的分区Key。另一个有趣的地方是<strong>year</strong>和<strong>title</strong>列值的位置，它们被存储为列名称的一部分，而不是列值。注意，这只是用于展示底层的存储结构概念，实际存储中Cassandra对数据做了优化。</p>
<p>还需要注意的是，行集中行的顺序是先<strong>year</strong>后<strong>title</strong>，这跟它们在主键列表中定义的顺序是一致的。也可以通过<code>WITH CLUSTERING ORDER BY</code>语句改变行的排序规则，例如：</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div><div class="line">8</div></pre></td><td class="code"><pre><div class="line">CREATE TABLE authors (</div><div class="line">    name text,</div><div class="line">    year int,</div><div class="line">    title text,</div><div class="line">    isbn text,</div><div class="line">    publisher text,</div><div class="line">PRIMARY KEY (name, year, title) )</div><div class="line">WITH CLUSTERING ORDER BY (year DESC);</div></pre></td></tr></table></figure>
<p>再对数据进行查询时，结果将显示为：</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div></pre></td><td class="code"><pre><div class="line">name | year | title | isbn | publisher</div><div class="line">-----------+------+-----------------+---------------+-----------</div><div class="line">Tom Clancy | 1993 | Without Remorse | 0-399-13825-0 | Putnam</div><div class="line">Tom Clancy | 1987 | Patriot Games | 0-399-13241-4 | Putnam</div></pre></td></tr></table></figure>
<p>这种建模的差异对查询操作影响很大，具体请参考后续CQL查询部分。</p>
<h4 id="Composite-partition-keys"><a href="#Composite-partition-keys" class="headerlink" title="Composite partition keys"></a>Composite partition keys</h4><p>前例中都是单个的分区Key，也可以使用多个字段创建组合的分区Key。使用组合的分区Key的目的是用来优化数据的分布式存储策略，从而提升数据查询的速度。一个重要的例子是建模时间序列数据时使用时间桶（buckets）作为分区Key。</p>
<p>下面来看组合分区Key的定义：</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div></pre></td><td class="code"><pre><div class="line">CREATE TABLE authors (</div><div class="line">    name text,</div><div class="line">    year int,</div><div class="line">    title text,</div><div class="line">    isbn text,</div><div class="line">    publisher text,</div><div class="line">PRIMARY KEY (( name, year), title) );</div></pre></td></tr></table></figure>
<p>在主键定义列表中，通过括号将<strong>name</strong>和<strong>year</strong>括起来，表示这是一个组合的分区Key。</p>
<p>在存储层，year从列名称的一部分变成了<strong>Row Key</strong>的一部分，如下所示：</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div></pre></td><td class="code"><pre><div class="line">Row Key: Tom Clancy: 1993</div><div class="line">    = &gt; (name = Without Remorse:isbn, value = 0-399-13241-4)</div><div class="line">    = &gt; (name = Without Remorse:publisher, value = 5075746e616d)</div><div class="line">-------------------</div><div class="line">Row Key: Tom Clancy: 1987</div><div class="line">    = &gt; (name = Patriot Games:isbn, value = 0-399-13825-0)</div><div class="line">    = &gt; (name = Patriot Games:publisher, value = 5075746e616d)</div></pre></td></tr></table></figure>
<h4 id="存储模型的重要性"><a href="#存储模型的重要性" class="headerlink" title="存储模型的重要性"></a>存储模型的重要性</h4><p>理解Cassandra的底层存储模型是非常重要的，主要体现在：</p>
<ul>
<li>查询操作必须要考虑到底层的数据存储方式。如果你无法理解数据是如何存储的，你的查询可能会经常碰到各种错误信息，最坏的情况是性能非常差。</li>
<li>对于分区Key的选择要慎重考虑，在查询时它必须是可知的条件而且要能够很好的分散在集群中。在建模时要避免少量的分区Key对应大量的列，这会严重影响数据在集群中的分布。</li>
<li>由于Cassandra采用日志存储结构，使用区间查询时会获得很好的性能。简单来说，区间查询就是针对一个给定的分区Key按照列数据存储顺序查询其中的一部分。注意，区间查询不能跨多个分区，因为不同的分区在磁盘上对应不同的物理存储位置。</li>
<li>要慎重选择clustering columns的顺序，它们的顺序决定着数据在磁盘上的存放顺序，进而影响你能够对这些数据做什么样的查询。</li>
</ul>
<p>在关系型数据建模时，结构规范化是要优先考虑的。与之不同，<strong>在Cassandra中进行建模时，数据如何查询是优先考虑的</strong>，这一原则必须牢记。</p>
<h3 id="理解CQL查询"><a href="#理解CQL查询" class="headerlink" title="理解CQL查询"></a>理解CQL查询</h3><p>下面以下述authors表模型介绍CQL的各种查询：</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div></pre></td><td class="code"><pre><div class="line">CREATE TABLE authors ( name text, year int, title text, isbn text, publisher text,</div><div class="line">PRIMARY KEY (name, year, title) ) WITH CLUSTERING ORDER BY (year DESC);</div></pre></td></tr></table></figure>
<h4 id="Query-by-key"><a href="#Query-by-key" class="headerlink" title="Query by key"></a>Query by key</h4><p>下面看一个使用Key的基本查询：<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div></pre></td><td class="code"><pre><div class="line">SELECT * FROM authors WHERE name = &apos;Tom Clancy&apos;;</div></pre></td></tr></table></figure></p>
<p>对于这个简单查询，请求首先发送到一个协调节点，这个节点拥有一个我们查询Key的副本。然后，这个节点从另一个副本存储节点获取该行（列族）以满足Quorum的要求。因此，需要两个节点才能满足此查询的要求。</p>
<p><img src="/images/cassandra-cql-query-basic.png" alt=""></p>
<p>在存储层，该查询首先定位分区Key的位置，然后按照列自然排序规则顺序扫描所有的列。因此，尽管看起来是一个简单查询，存储层仍然将其转换为区间查询。</p>
<p><img src="/images/cassandra-cql-query-basic-storage.png" alt=""></p>
<h4 id="Range-Queries"><a href="#Range-Queries" class="headerlink" title="Range Queries"></a>Range Queries</h4><p>下面来看一个区间查询的具体操作：</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div></pre></td><td class="code"><pre><div class="line">SELECT * FROM authors WHERE name = &apos;Tom Clancy&apos; AND year &gt; = 1993;</div></pre></td></tr></table></figure>
<p>此查询仍然只选择一个分区，因此跟前例相同，查询只需要两个节点参与。与前例不同的是，Cassandra在扫描到一条不符合条件的记录就会停止扫描。</p>
<p><img src="/images/cassandra-cql-query-range.png" alt=""></p>
<p>此例中，扫描到year值为1991时，Cassandra就知道没有更多列需要扫描了。因此这个查询的效率是非常高的。</p>
<p>从上面的两个示例我们可以总结以下三点：</p>
<ol>
<li>顺序查询是高效的，它充分利用了Cassandra在存储层的自然排序存储机制。</li>
<li>以分区Key或者分区Key与Clustering columns组合条件的查询在存储层是顺序查询，这也意味着这类查询是优化的。</li>
<li>尽量按照你读数据的方式去写数据。这与关系型数据库建模是不同的。</li>
</ol>
<h4 id="拥抱反范式"><a href="#拥抱反范式" class="headerlink" title="拥抱反范式"></a>拥抱反范式</h4><p>关系型数据库中的范式建模在很大程度上会导致客户端使用联合查询（joins）。以authors表为例，author和book的一对多关系会要求将books建为另一张表，每个表都有一个ID主键，books表有一个authorID作为外键。</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div></pre></td><td class="code"><pre><div class="line">CREATE TABLE authors ( authorID int, name varchar( 50), PRIMARY KEY (authorID) )</div><div class="line"></div><div class="line">CREATE TABLE books ( bookID int, authorID int, name varchar( 100), year int, INDEX auth_ind (authorID), FOREIGN KEY (authorID) REFERENCES authors( authorID) )</div></pre></td></tr></table></figure>
<p>这种联合查询在关系型数据库中很常见，但是放到Cassandra这种分布式数据库中，一次联合查询需要跨多个表，而一个表需要跨多个节点，其复杂性和效率可想而知。</p>
<p>在Cassandra中，使用一个authors表和一个books集合就可以解决这种需求。</p>
<p>尽管在Cassandra中仍然可以采用<strong>第二索引</strong>避免反范式数据模型，但这并不是好的方案。</p>
<h3 id="使用集合实现反范式"><a href="#使用集合实现反范式" class="headerlink" title="使用集合实现反范式"></a>使用集合实现反范式</h3><p>Cassandra支持三种集合类型：Sets，Lists 和 Maps。在具体介绍之前首先要知道使用集合的几个原则：</p>
<ul>
<li>集合中的每个数据项的大小不能超过64KB</li>
<li>在同一个集合中最多可以存储64000个数据项</li>
<li>查询一个集合总是会返回整个集合</li>
<li>集合最好用来建模小的有限的数据集</li>
</ul>
<h4 id="Sets"><a href="#Sets" class="headerlink" title="Sets"></a>Sets</h4><p>CQL中的Set与编程语言中的Set类似，集合中的数据项是唯一的。稍有差异的是，在Cassandra中，Set中的数据项是按自然语言排序的。</p>
<p>下面来看一个包含了books集合的authors表：</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div></pre></td><td class="code"><pre><div class="line">CREATE TABLE authors (</div><div class="line">    name text,</div><div class="line">    books set &lt; text &gt;,</div><div class="line">PRIMARY KEY (name) );</div></pre></td></tr></table></figure>
<p>可以对这个表进行<strong>INSERT</strong>和<strong>UPDATE</strong>操作：</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div></pre></td><td class="code"><pre><div class="line">INSERT INTO authors (name, books) VALUES (&apos; Tom Clancy&apos;, &#123;&apos; Without Remorse&apos;, &apos;Patriot Games&apos;&#125;);</div><div class="line"></div><div class="line">-- 向集合中添加数据</div><div class="line">UPDATE authors SET books = books + &#123;&apos; Red Storm Rising&apos;&#125; WHERE name = &apos;Tom Clancy&apos;;</div><div class="line"></div><div class="line">-- 从集合中移除数据</div><div class="line">UPDATE authors SET books = books - &#123;&apos; Red Storm Rising&apos;&#125; WHERE name = &apos;Tom Clancy&apos;;</div></pre></td></tr></table></figure>
<p>在存储层，Set集合的数据项被存储为列名称，列值留空。这可以确保数据的唯一性，因为任何更改只需要覆盖旧的列名称。</p>
<p>books在存储层的示意结构如下图所示：</p>
<p><img src="/images/cassandra-cql-denormalization-set.png" alt=""></p>
<p>可以看到set集合的名称作为列名称的一部分，集合中的数据项作为列名称的另一部分。</p>
<h4 id="Lists"><a href="#Lists" class="headerlink" title="Lists"></a>Lists</h4><p>从CQL层看，Lists与Sets非常相似：</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div></pre></td><td class="code"><pre><div class="line">CREATE TABLE authors (</div><div class="line">    name text,</div><div class="line">    books list&lt;text&gt;,</div><div class="line">PRIMARY KEY (name)</div><div class="line">);</div></pre></td></tr></table></figure>
<p>对Lists集合的<strong>INSERT</strong>和<strong>UPDATE</strong>操作：</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div><div class="line">8</div><div class="line">9</div><div class="line">10</div><div class="line">11</div><div class="line">12</div><div class="line">13</div><div class="line">14</div><div class="line">15</div><div class="line">16</div><div class="line">17</div></pre></td><td class="code"><pre><div class="line">INSERT INTO authors (name, books)</div><div class="line">VALUES (&apos;Tom Clancy&apos;, [&apos;Without Remorse&apos;, &apos;Patriot Games&apos;]);</div><div class="line"></div><div class="line">-- 由于List是有序的，因此CQL支持prepend和append操作</div><div class="line"></div><div class="line">UPDATE authors</div><div class="line">SET books = books + [&apos;Red Storm Rising&apos;]</div><div class="line">WHERE name = &apos;Tom Clancy&apos;;</div><div class="line"></div><div class="line">UPDATE authors</div><div class="line">SET books = [&apos;Red Storm Rising&apos;] + books</div><div class="line">WHERE name = &apos;Tom Clancy&apos;;</div><div class="line"></div><div class="line">-- 可以删除指定name的数据项</div><div class="line">UPDATE authors</div><div class="line">SET books = books - [&apos;Red Storm Rising&apos;]</div><div class="line">WHERE name = &apos;Tom Clancy&apos;;</div></pre></td></tr></table></figure>
<p>不同于Set集合，List集合在存储层将数据项放置在列值中，列名称用UUID来确保集合顺序。</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div></pre></td><td class="code"><pre><div class="line">Row Key: Tom Clancy</div><div class="line">    =&gt; (name=books:d36de8b0305011e4a0dddbbeade718be, value=576974686f)</div><div class="line">    =&gt; (name=books:d36de8b1305011e4a0dddbbeade718be, value=506174726)</div></pre></td></tr></table></figure>
<h4 id="Maps"><a href="#Maps" class="headerlink" title="Maps"></a>Maps</h4><figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div></pre></td><td class="code"><pre><div class="line">CREATE TABLE authors (</div><div class="line">name text,</div><div class="line">books map&lt;text, int&gt;,</div><div class="line">PRIMARY KEY (name)</div><div class="line">);</div></pre></td></tr></table></figure>
<p>对Map集合执行<strong>INSERT</strong>，<strong>UPDATE</strong>和<strong>DELETE</strong>操作：</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div><div class="line">8</div><div class="line">9</div></pre></td><td class="code"><pre><div class="line">INSERT INTO authors (name, books)</div><div class="line">VALUES (&apos;Tom Clancy&apos;, &#123;&apos;Without Remorse&apos;:1993, &apos;Patriot Games&apos;:1987&#125;);</div><div class="line"></div><div class="line">UPDATE authors</div><div class="line">SET books[&apos;Red Storm Rising&apos;] = 1986</div><div class="line">WHERE name = &apos;Tom Clancy&apos;;</div><div class="line"></div><div class="line">DELETE books[&apos;Red Storm Rising&apos;]</div><div class="line">FROM authors WHERE name = &apos;Tom Clancy&apos;;</div></pre></td></tr></table></figure>
<p>在存储层，Map与List的存储类似，差别是排序的ID被替换成Map的Key：</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div></pre></td><td class="code"><pre><div class="line">RowKey: Tom Clancy</div><div class="line">    =&gt; (name=books:50617472696f742047616d6573, value=000007c3)</div><div class="line">    =&gt; (name=books:576974686f75742052656d6f727365, value=000007c9)</div></pre></td></tr></table></figure>
<p>从上述存储结构可以看到，集合类型使用了类似Clustering columns的组合列存储，但是，当前Cassandra并不支持集合类型的区间查询。</p>
<h3 id="使用Materialized-views（MV）实现反范式"><a href="#使用Materialized-views（MV）实现反范式" class="headerlink" title="使用Materialized views（MV）实现反范式"></a>使用Materialized views（MV）实现反范式</h3><p>在很多场景中，你可能需要使用不同的主键来查询数据。为了能够使用分区Key并顺序读取数据，在3.0版本之前，需要创建多张表，每张表对应不同的查询类型。</p>
<p>Cassandra从3.0版本开始，提供了一种称之为Materialized views的特性，该功能负责自动构建数据的不同视图来满足不同类型的查询。</p>
<p>创建Materialized views非常简单，以前述authors表为例，如果需要查询给定<strong>year</strong>的所有<strong>authors</strong>，可定义如下视图：</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div></pre></td><td class="code"><pre><div class="line">CREATE MATERIALIZED VIEW books_by_year AS</div><div class="line">    SELECT *</div><div class="line">    FROM authors</div><div class="line">    WHERE year IS NOT NULL</div><div class="line">        AND name IS NOT NULL</div><div class="line">        AND title IS NOT NULL</div><div class="line">PRIMARY KEY (year, name, title);</div></pre></td></tr></table></figure>
<p>当向authors表中插入新数据时，Cassandra会自动更新该视图，以便于按下述条件查询：</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div></pre></td><td class="code"><pre><div class="line">SELECT * FROM books_by_year</div><div class="line">WHERE year = 1987;</div></pre></td></tr></table></figure>
<p>CQL要求查询时必须使用非空（<code>non-null</code>）的主键列作为查询条件。在上例中使用了 IS NOT NULL限定。创建Materialized views时也可以在WHERE语句中使用数据过滤，如：</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div></pre></td><td class="code"><pre><div class="line">CREATE MATERIALIZED VIEW clancy_books AS</div><div class="line">    SELECT *</div><div class="line">    FROM authors</div><div class="line">    WHERE name = &apos;Tom Clancy&apos;</div><div class="line">        AND title IS NOT NULL</div><div class="line">        AND year IS NOT NULL</div><div class="line">PRIMARY KEY (name, title, year);</div></pre></td></tr></table></figure>
<p>使用Materialized views时，当数据出现变更，Cassandra需要同时写入基本表和视图表。这是有性能损耗的，但相比于二级索引，MV仍然保持很高的性能。下图是官方的对比数据：</p>
<p><img src="/images/cassandra-materialized-view-performance.png" alt=""></p>
<p>MV的工作机制如下图所示：</p>
<p><img src="/images/cassandra-materialized-view-process.png" alt=""></p>
<p>使用了MV之后，更新/插入操作需要先read然后再写，此外还要做一致性检查，都会存在性能损耗。</p>
<p>下图是官方给出的原始写（raw ）与采用了MV的写性能对比</p>
<p><img src="/images/cassandra-materialized-view-perf2.png" alt=""></p>
<p>总之，使用MV时要注意以下几点：</p>
<ul>
<li>从MV读取数据跟从原始表读数据的性能是一样的</li>
<li>写数据的时候，每一个MV性能会下降10%.</li>
<li>如果Primary Key 只有1个，MV的性能要好于人工反范式（手工维护多张表）.</li>
<li>对于复合主键(Compund primary keys)，MV相对来说还是要快的，但是当复合主键太长（比如&gt;100）的时候，人工可能还要快一些</li>
<li>如果原始表有大规模删除，MV需要发起很多CQL来查询与删除。性能可能会急剧下降</li>
<li>MV主键的设计还是要考虑是否会造成热点（hot spot）。hot spot 依然会造成性能问题，甚至OOM</li>
<li>当前MV仅支持select。</li>
</ul>
<h3 id="操纵时间序列数据"><a href="#操纵时间序列数据" class="headerlink" title="操纵时间序列数据"></a>操纵时间序列数据</h3><p>近二十多年，数据建模都是围绕着实体关系进行。最近几年，人们开始给数据加入<strong>时间</strong>维度，即除了实体间的关系，大家开始关注这些关系是如何随时间变化的。关系型建模和时间序列数据建模的一个有趣而重要的差异是关系数据是可变的而时间序列数据是不可变的。可变数据是不稳定的，因为它可能随时改变。为保证我们有最新的数据版本处理过程变得非常复杂。相比之下，不可变数据是稳定的，这意味着我们可以避免与随时间变化的数据相关的许多复杂性。</p>
<p>下述是一个定义传感器数据表示例：</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div></pre></td><td class="code"><pre><div class="line">CREATE TABLE sensor_readings (</div><div class="line">    sensorID uuid,</div><div class="line">    time_bucket int,</div><div class="line">    timestamp bigint,</div><div class="line">    reading decimal,</div><div class="line"> PRIMARY KEY ((sensorID, time_bucket), timestamp)</div><div class="line"> ) WITH CLUSTERING ORDER BY (timestamp DESC);</div></pre></td></tr></table></figure>
<h3 id="参考"><a href="#参考" class="headerlink" title="参考"></a>参考</h3><ul>
<li><a href="https://pandaforme.gitbooks.io/introduction-to-cassandra/content/understand_the_cassandra_data_model.html" target="_blank" rel="external">https://pandaforme.gitbooks.io/introduction-to-cassandra/content/understand_the_cassandra_data_model.html</a></li>
<li><a href="http://www.flyml.net/2016/10/30/cassandra-tutorial-materialized-view" target="_blank" rel="external">http://www.flyml.net/2016/10/30/cassandra-tutorial-materialized-view</a></li>
<li><a href="https://opencredo.com/cassandra-data-modelling-patterns/" target="_blank" rel="external">https://opencredo.com/cassandra-data-modelling-patterns/</a></li>
</ul>

      
    </div>

    <div>
      
        

      
    </div>

    <div>
      
        

      
    </div>


    <footer class="post-footer">
      
        <div class="post-tags">
          
            <a href="/tags/cassandra/" rel="tag"># cassandra</a>
          
            <a href="/tags/CQL/" rel="tag"># CQL</a>
          
            <a href="/tags/SQL/" rel="tag"># SQL</a>
          
        </div>
      

      
        <div class="post-nav">
          <div class="post-nav-next post-nav-item">
            
              <a href="/2017/02/05/cassandra-basic-concepts/" rel="next" title="Cassandra中的基本概念（转）">
                <i class="fa fa-chevron-left"></i> Cassandra中的基本概念（转）
              </a>
            
          </div>

          <span class="post-nav-divider"></span>

          <div class="post-nav-prev post-nav-item">
            
              <a href="/2017/02/10/storm-topology-shutdown/" rel="prev" title="Storm拓扑自动重启">
                Storm拓扑自动重启 <i class="fa fa-chevron-right"></i>
              </a>
            
          </div>
        </div>
      

      
      
    </footer>
  </article>



    <div class="post-spread">
      
        <div class="ds-share flat" data-thread-key="2017/02/08/cassandra-data-modeling/"
     data-title="深入理解Cassandra中的数据建模"
     data-content=""
     data-url="http://chrisrc.me/2017/02/08/cassandra-data-modeling/">
  <div class="ds-share-inline">
    <ul  class="ds-share-icons-16">

      <li data-toggle="ds-share-icons-more"><a class="ds-more" href="javascript:void(0);">分享到：</a></li>
      <li><a class="ds-weibo" href="javascript:void(0);" data-service="weibo">微博</a></li>
      <li><a class="ds-qzone" href="javascript:void(0);" data-service="qzone">QQ空间</a></li>
      <li><a class="ds-qqt" href="javascript:void(0);" data-service="qqt">腾讯微博</a></li>
      <li><a class="ds-wechat" href="javascript:void(0);" data-service="wechat">微信</a></li>

    </ul>
    <div class="ds-share-icons-more">
    </div>
  </div>
</div>
      
    </div>
  </div>


          </div>
          


          
  <div class="comments" id="comments">
    
      <div class="ds-thread" data-thread-key="2017/02/08/cassandra-data-modeling/"
           data-title="深入理解Cassandra中的数据建模" data-url="http://chrisrc.me/2017/02/08/cassandra-data-modeling/">
      </div>
    
  </div>


        </div>
        
          
  
  <div class="sidebar-toggle">
    <div class="sidebar-toggle-line-wrap">
      <span class="sidebar-toggle-line sidebar-toggle-line-first"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-middle"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-last"></span>
    </div>
  </div>

  <aside id="sidebar" class="sidebar">
    <div class="sidebar-inner">

      

      
        <ul class="sidebar-nav motion-element">
          <li class="sidebar-nav-toc sidebar-nav-active" data-target="post-toc-wrap" >
            文章目录
          </li>
          <li class="sidebar-nav-overview" data-target="site-overview">
            站点概览
          </li>
        </ul>
      

      <section class="site-overview sidebar-panel">
        <div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person">
          <img class="site-author-image" itemprop="image"
               src="/images/avatar.gif"
               alt="chrisrc" />
          <p class="site-author-name" itemprop="name">chrisrc</p>
          <p class="site-description motion-element" itemprop="description"></p>
        </div>
        <nav class="site-state motion-element">
          <div class="site-state-item site-state-posts">
            <a href="/archives">
              <span class="site-state-item-count">69</span>
              <span class="site-state-item-name">日志</span>
            </a>
          </div>

          
            <div class="site-state-item site-state-categories">
              <a href="/categories">
                <span class="site-state-item-count">21</span>
                <span class="site-state-item-name">分类</span>
              </a>
            </div>
          

          
            <div class="site-state-item site-state-tags">
              <a href="/tags">
                <span class="site-state-item-count">162</span>
                <span class="site-state-item-name">标签</span>
              </a>
            </div>
          

        </nav>

        

        <div class="links-of-author motion-element">
          
            
              <span class="links-of-author-item">
                <a href="https://github.com/christtrc" target="_blank" title="GitHub">
                  
                    <i class="fa fa-fw fa-github"></i>
                  
                  GitHub
                </a>
              </span>
            
              <span class="links-of-author-item">
                <a href="https://twitter.com/chrisrc" target="_blank" title="Twitter">
                  
                    <i class="fa fa-fw fa-twitter"></i>
                  
                  Twitter
                </a>
              </span>
            
          
        </div>

        
        
          <div class="cc-license motion-element" itemprop="license">
            <a href="https://creativecommons.org/licenses/by-nc-sa/4.0/" class="cc-opacity" target="_blank">
              <img src="/images/cc-by-nc-sa.svg" alt="Creative Commons" />
            </a>
          </div>
        

        
        

        


      </section>

      
      <!--noindex-->
        <section class="post-toc-wrap motion-element sidebar-panel sidebar-panel-active">
          <div class="post-toc">

            
              
            

            
              <div class="post-toc-content"><ol class="nav"><li class="nav-item nav-level-3"><a class="nav-link" href="#数据模型"><span class="nav-number">1.</span> <span class="nav-text">数据模型</span></a><ol class="nav-child"><li class="nav-item nav-level-4"><a class="nav-link" href="#Row-Partition"><span class="nav-number">1.1.</span> <span class="nav-text">Row(Partition)</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#Key-Partition-Key"><span class="nav-number">1.2.</span> <span class="nav-text">Key (Partition Key)</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#Column-family-Table"><span class="nav-number">1.3.</span> <span class="nav-text">Column family (Table)</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#Table-with-single-row-partitions"><span class="nav-number">1.4.</span> <span class="nav-text">Table with single-row partitions</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#Table-with-multi-row-partitions"><span class="nav-number">1.5.</span> <span class="nav-text">Table with multi-row partitions</span></a></li></ol></li><li class="nav-item nav-level-3"><a class="nav-link" href="#日志结构存储"><span class="nav-number">2.</span> <span class="nav-text">日志结构存储</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#理解压缩（Compaction）"><span class="nav-number">3.</span> <span class="nav-text">理解压缩（Compaction）</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#CQL"><span class="nav-number">4.</span> <span class="nav-text">CQL</span></a><ol class="nav-child"><li class="nav-item nav-level-4"><a class="nav-link" href="#单个Primary-Key"><span class="nav-number">4.1.</span> <span class="nav-text">单个Primary Key</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#Compound-Keys"><span class="nav-number">4.2.</span> <span class="nav-text">Compound Keys</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#Composite-partition-keys"><span class="nav-number">4.3.</span> <span class="nav-text">Composite partition keys</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#存储模型的重要性"><span class="nav-number">4.4.</span> <span class="nav-text">存储模型的重要性</span></a></li></ol></li><li class="nav-item nav-level-3"><a class="nav-link" href="#理解CQL查询"><span class="nav-number">5.</span> <span class="nav-text">理解CQL查询</span></a><ol class="nav-child"><li class="nav-item nav-level-4"><a class="nav-link" href="#Query-by-key"><span class="nav-number">5.1.</span> <span class="nav-text">Query by key</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#Range-Queries"><span class="nav-number">5.2.</span> <span class="nav-text">Range Queries</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#拥抱反范式"><span class="nav-number">5.3.</span> <span class="nav-text">拥抱反范式</span></a></li></ol></li><li class="nav-item nav-level-3"><a class="nav-link" href="#使用集合实现反范式"><span class="nav-number">6.</span> <span class="nav-text">使用集合实现反范式</span></a><ol class="nav-child"><li class="nav-item nav-level-4"><a class="nav-link" href="#Sets"><span class="nav-number">6.1.</span> <span class="nav-text">Sets</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#Lists"><span class="nav-number">6.2.</span> <span class="nav-text">Lists</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#Maps"><span class="nav-number">6.3.</span> <span class="nav-text">Maps</span></a></li></ol></li><li class="nav-item nav-level-3"><a class="nav-link" href="#使用Materialized-views（MV）实现反范式"><span class="nav-number">7.</span> <span class="nav-text">使用Materialized views（MV）实现反范式</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#操纵时间序列数据"><span class="nav-number">8.</span> <span class="nav-text">操纵时间序列数据</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#参考"><span class="nav-number">9.</span> <span class="nav-text">参考</span></a></li></ol></div>
            

          </div>
        </section>
      <!--/noindex-->
      

    </div>
  </aside>


        
      </div>
    </main>

    <footer id="footer" class="footer">
      <div class="footer-inner">
        <div class="copyright" >
  
  &copy;  2015 - 
  <span itemprop="copyrightYear">2017</span>
  <span class="with-love">
    <i class="fa fa-heart"></i>
  </span>
  <span class="author" itemprop="copyrightHolder">
     
      <a href="mailto:chrisrc@qq.com">chrisrc</a> 
    
  </span>
  <span class="author"> &nbsp; QQ: 58122705 </span>
</div>


<div class="powered-by">
  由 <a class="theme-link" href="https://hexo.io">Hexo</a> 强力驱动
</div>

<div class="theme-info">
  主题 -
  <a class="theme-link" href="https://github.com/iissnan/hexo-theme-next">
    NexT.Mist
  </a>
</div>


        

        
      </div>
    </footer>

    <div class="back-to-top">
      <i class="fa fa-arrow-up"></i>
    </div>
  </div>

  

<script type="text/javascript">
  if (Object.prototype.toString.call(window.Promise) !== '[object Function]') {
    window.Promise = null;
  }
</script>









  



  
  <script type="text/javascript" src="/lib/jquery/index.js?v=2.1.3"></script>

  
  <script type="text/javascript" src="/lib/fastclick/lib/fastclick.min.js?v=1.0.6"></script>

  
  <script type="text/javascript" src="/lib/jquery_lazyload/jquery.lazyload.js?v=1.9.7"></script>

  
  <script type="text/javascript" src="/lib/velocity/velocity.min.js?v=1.2.1"></script>

  
  <script type="text/javascript" src="/lib/velocity/velocity.ui.min.js?v=1.2.1"></script>

  
  <script type="text/javascript" src="/lib/fancybox/source/jquery.fancybox.pack.js?v=2.1.5"></script>


  


  <script type="text/javascript" src="/js/src/utils.js?v=5.1.0"></script>

  <script type="text/javascript" src="/js/src/motion.js?v=5.1.0"></script>



  
  

  
  <script type="text/javascript" src="/js/src/scrollspy.js?v=5.1.0"></script>
<script type="text/javascript" src="/js/src/post-details.js?v=5.1.0"></script>



  


  <script type="text/javascript" src="/js/src/bootstrap.js?v=5.1.0"></script>



  

  
    
  

  <script type="text/javascript">
    var duoshuoQuery = {short_name:"chrisrc"};
    (function() {
      var ds = document.createElement('script');
      ds.type = 'text/javascript';ds.async = true;
      ds.id = 'duoshuo-script';
      ds.src = (document.location.protocol == 'https:' ? 'https:' : 'http:') + '//static.duoshuo.com/embed.js';
      ds.charset = 'UTF-8';
      (document.getElementsByTagName('head')[0]
      || document.getElementsByTagName('body')[0]).appendChild(ds);
    })();
  </script>

  
    
    
    <script src="/lib/ua-parser-js/dist/ua-parser.min.js?v=0.7.9"></script>
    <script src="/js/src/hook-duoshuo.js"></script>
  








  
  

  

  

  
<script>
(function(){
    var bp = document.createElement('script');
    var curProtocol = window.location.protocol.split(':')[0];
    if (curProtocol === 'https') {
        bp.src = 'https://zz.bdstatic.com/linksubmit/push.js';        
    }
    else {
        bp.src = 'http://push.zhanzhang.baidu.com/push.js';
    }
    var s = document.getElementsByTagName("script")[0];
    s.parentNode.insertBefore(bp, s);
})();
</script>


  
<script>
(function(){
   var src = (document.location.protocol == "http:") ? "http://js.passport.qihucdn.com/11.0.1.js?ed903ab05fbc5fa64d95e4bfbf3d3a04":"https://jspassport.ssl.qhimg.com/11.0.1.js?ed903ab05fbc5fa64d95e4bfbf3d3a04";
   document.write('<script src="' + src + '" id="sozz"><\/script>');
})();
</script>

  


</body>
</html>
